import sys
from collections import defaultdict

#function to find the mode of a tag-defined read group	
def find_mode_by_degen(thelist):
	counts = {}
	for item in thelist:
		counts [item] = counts.get (item, 0) + 1
		maximumcount = 0
		maximumitem = None
	for k, v in counts.items ():
		if v > maximumcount:
			maximumitem = k
			maximumcount = v
	#return -1 if there is more than one mode
	if counts.values().count (maximumcount) > 1:
		return -1,-1,-1
	#return -1 if there two or less reads supporting the mode
	elif maximumcount <= 2:
		return -1,-1,-1
	else:
		return maximumitem,maximumcount,len(thelist)
		
#for a given STR target, how many tag-defined read groups support each mode		
def find_modes (thelist):
	mode_to_count={}
	for mode in thelist:
		if mode not in mode_to_count:
			mode_to_count[mode] = 0
		mode_to_count[mode] += 1
	return mode_to_count



myfile = open(sys.argv[1], "r")
header = myfile.readline()
mylist = myfile.readlines()
acc = str(sys.argv[2])
mip = str(sys.argv[3])
mip = mip[:-8]

#put in the right place!
outfile = open("~/MIPSTR_analysis/Accessions/%s/multimodes/%s_multimodes.txt"%(acc,mip), "w")
p), "w")

		
tag_to_list={}
for line in mylist:
	sline=line.rstrip().split()
	tag = sline[0]
	copy = int(sline[1])
	if not tag in tag_to_list:
		tag_to_list[tag] = []
	tag_to_list[tag].append(copy)


#can change strictness here!
mode_list = []
nreads_with_mode_list=[]
total_reads_per_tag_list=[]
for tag, list in tag_to_list.iteritems():
	mode, nreads, total = find_mode_by_degen(list)
	if int(nreads) > .66*int(total):
		mode_list.append(mode)
	else:
		nreads_with_mode_list.append(nreads)
		total_reads_per_tag_list.append(total)

#outputs a test file with three columns, the target, the unit number, the number of tag-defined read groups that support that unit number
modes_dict = find_modes(mode_list)
for mode, count in modes_dict.iteritems():
	outfile.write("%s %d %d\n"%(mip,mode,count))


